Author

Daniela Palleschi

Code
# set global knit options
knitr::opts_chunk$set(echo = T,
                      eval = T,
                      error = F,
                      warning = F,
                      message = F,
                      cache = T)

# suppress scientific notation
options(scipen=999)

# list of required packages
packages <- c( #"SIN", # this package was removed from the CRAN repository
               "MASS", "dplyr", "tidyr", "purrr", "extraDistr", "ggplot2", "loo", "bridgesampling", "brms", "bayesplot", "tictoc", "hypr", "bcogsci", "papaja", "grid", "kableExtra", "gridExtra", "lme4", "cowplot", "pdftools", "cmdstanr", "rootSolve", "rstan"
  )

# NB: if you haven't already installed bcogsci through devtools, it won't be loaded
## Now load or install & load all
package.check <- lapply(
  packages,
  FUN = function(x) {
    if (!require(x, character.only = TRUE)) {
      install.packages(x, dependencies = TRUE)
      library(x, character.only = TRUE)
    }
  }
)

# this is also required, taken from the textbook

## Save compiled models:
rstan_options(auto_write = FALSE)
## Parallelize the chains using all the cores:
options(mc.cores = parallel::detectCores())
# To solve some conflicts between packages
select <- dplyr::select
extract <- rstan::extract

Set options

# set condition so that when we knit a single child Rmd we also get the references for just that section
## the parent.Rmd creates 'knit_type' to 'parent'; if this is a child knit then 'knit_type' does not exist yet

# if 'parent_knit' exists, then it was created in 'parent.Rmd' and this is a parent knit, so keep it unchanged, otherwise call it 'child'
knit_type = ifelse(exists("knit_type"),knit_type,"chapter")
Code
# set global knit options
knitr::opts_chunk$set(echo = T,
                      eval = T,
                      error = F,
                      warning = F,
                      message = F,
                      cache = T)

# suppress scientific notation
options(scipen=999)

# list of required packages
packages <- c( #"SIN", # this package was removed from the CRAN repository
               "MASS", "dplyr", "tidyr", "purrr", "extraDistr", "ggplot2", "loo", "bridgesampling", "brms", "bayesplot", "tictoc", "hypr", "bcogsci", "papaja", "grid", "kableExtra", "gridExtra", "lme4", "cowplot", "pdftools", "cmdstanr", "rootSolve", "rstan"
  )

# NB: if you haven't already installed bcogsci through devtools, it won't be loaded
## Now load or install & load all
package.check <- lapply(
  packages,
  FUN = function(x) {
    if (!require(x, character.only = TRUE)) {
      install.packages(x, dependencies = TRUE)
      library(x, character.only = TRUE)
    }
  }
)

# this is also required, taken from the textbook

## Save compiled models:
rstan_options(auto_write = FALSE)
## Parallelize the chains using all the cores:
options(mc.cores = parallel::detectCores())
# To solve some conflicts between packages
select <- dplyr::select
extract <- rstan::extract

1 Chapter 4

1.1 Exercise 4.1 - A simple linear regression: Power posing and testosterone.

Load the following data set:

data("df_powerpose")
head(df_powerpose)
  id hptreat female age testm1  testm2
2 29    High   Male  19 38.725  62.375
3 30     Low Female  20 32.770  29.235
4 31    High Female  20 32.320  27.510
5 32     Low Female  18 17.995  28.655
7 34     Low Female  21 73.580  44.670
8 35    High Female  20 80.695 105.485

The data set, which was originally published in Carney, Cuddy, and Yap (2010) but released in modified form by Fosse (2016), shows the testosterone levels of 39 different individuals, before and after treatment, where treatment refers to each individual being assigned to a high power pose or a low power pose. In the original paper by Carney, Cuddy, and Yap (2010), the unit given for testosterone measurement (estimated from saliva samples) was picograms per milliliter (pg/ml). One picogram per milliliter is 0.001 nanogram per milliliter (ng/ml).

The research hypothesis is that on average, assigning a subject a high power pose vs. a low power pose will lead to higher testosterone levels after treatment. Assuming that you know nothing about normal ranges of testosterone using salivary measurement, choose an appropriate Cauchy prior (e.g., \(Cauchy(0,2.5)\) for the target parameter(s).

Investigate this claim using a linear model and the default priors of brms. You’ll need to estimate the effect of a new variable that encodes the change in testosterone.

1.1.1 My work

  • DV: change in testosterone levels (continuous, testm2-testm1)
  • IV: hptreat (binomial: high/low)
# create change variable
df_powerpose <- df_powerpose %>%
  mutate(change = testm2-testm1)
head(df_powerpose)
  id hptreat female age testm1  testm2     change
2 29    High   Male  19 38.725  62.375  23.650002
3 30     Low Female  20 32.770  29.235  -3.534999
4 31    High Female  20 32.320  27.510  -4.810000
5 32     Low Female  18 17.995  28.655  10.660000
7 34     Low Female  21 73.580  44.670 -28.910004
8 35    High Female  20 80.695 105.485  24.790000
# eyeball DV distribution
hist(df_powerpose$change)

# order factor
df_powerpose$hptreat <- factor(df_powerpose$hptreat, levels = c("Low", "High"))
levels(df_powerpose$hptreat)
[1] "Low"  "High"
# set contrasts
contrasts(df_powerpose$hptreat) <- c(-0.5,+0.5)
contrasts(df_powerpose$hptreat)
     [,1]
Low  -0.5
High  0.5
# fit model
fit_powerpose <- brm(
  change ~ 1 + hptreat,
  data = df_powerpose,
  family = gaussian()
)
# explore model
fit_powerpose
 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: change ~ 1 + hptreat 
   Data: df_powerpose (Number of observations: 39) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Population-Level Effects: 
          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept    -0.02      3.34    -6.77     6.49 1.00     3743     3138
hptreat1      8.82      6.72    -4.79    22.02 1.00     3717     2767

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma    20.55      2.39    16.58    25.81 1.00     3483     2795

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_powerpose)

pp_check(fit_powerpose, ndraws=100)

1.2 Exercise 4.2 - Another linear regression model: Revisiting attentional load effect on pupil size.

Here, we revisit the analysis shown in the chapter, on how attentional load affects pupil size.

  1. Our priors for this experiment were quite arbitrary. How do the prior predictive distributions look like? Do they make sense?
  2. Is our posterior distribution sensitive to the priors that we selected? Perform a sensitivity analysis to find out whether the posterior is affected by our choice of prior for the \(sigma\).
  3. Our data set includes also a column that indicates the trial number. Could it be that trial has also an effect on the pupil size? As in lm, we indicate another main effect with a + sign. How would you communicate the new results?

1.3 Exercise 4.3 - Log-normal model: Revisiting the effect of trial on finger tapping times

We continue considering the effect of trial on finger tapping times.

  1. Estimate the slowdown in milliseconds between the last two times the subject pressed the space bar in the experiment.
  2. How would you change your model (keeping the log-normal likelihood) so that it includes centered log-transformed trial numbers or square-root-transformed trial numbers (instead of centered trial numbers)? Does the effect in milliseconds change?

1.4 Exercise 4.4 - Logistic regression: Revisiting the effect of set size on free recall.

Our data set includes also a column coded as tested that indicates the position of the queued word. (In Figure 4.13 tested would be 3). Could it be that position also has an effect on recall accuracy? How would you incorporate this in the model? (We indicate another main effect with a + sign).

1.5 Exercise 4.5 - Red is the sexiest color

Load the following data set:

data("df_red")
head(df_red)
   risk age red pink redorpink
8     0  19   0    0         0
9     0  25   0    0         0
10    0  20   0    0         0
11    0  20   0    0         0
14    0  20   0    0         0
15    0  18   0    0         0

The data set is from a study (Beall and Tracy 2013) that contains information about the color of the clothing worn (red, pink, or red or pink) when the subject (female) is at risk of becoming pregnant (is ovulating, self-reported). The broader issue being investigated is whether women wear red more often when they are ovulating (in order to attract a mate). Using logistic regressions, fit three different models to investigate whether being ovulating increases the probability of wearing (a) red, (b) pink, or (c) either pink or red. Use priors that are reasonable (in your opinion).

Set options

Code
# set global knit options
knitr::opts_chunk$set(echo = T,
                      eval = T,
                      error = F,
                      warning = F,
                      message = F,
                      cache = T)

# suppress scientific notation
options(scipen=999)

# list of required packages
packages <- c( #"SIN", # this package was removed from the CRAN repository
               "MASS", "dplyr", "tidyr", "purrr", "extraDistr", "ggplot2", "loo", "bridgesampling", "brms", "bayesplot", "tictoc", "hypr", "bcogsci", "papaja", "grid", "kableExtra", "gridExtra", "lme4", "cowplot", "pdftools", "cmdstanr", "rootSolve", "rstan"
  )

# NB: if you haven't already installed bcogsci through devtools, it won't be loaded
## Now load or install & load all
package.check <- lapply(
  packages,
  FUN = function(x) {
    if (!require(x, character.only = TRUE)) {
      install.packages(x, dependencies = TRUE)
      library(x, character.only = TRUE)
    }
  }
)

# this is also required, taken from the textbook

## Save compiled models:
rstan_options(auto_write = FALSE)
## Parallelize the chains using all the cores:
options(mc.cores = parallel::detectCores())
# To solve some conflicts between packages
select <- dplyr::select
extract <- rstan::extract

2 Chapter 5 - Bayesian hierachical models

2.1 Exercise 5.1 A hierarchical model (normal likelihood) of cognitive load on pupil .

As in section 4.1, we focus on the effect of cognitive load on pupil , but this time we look at all the subjects of Wahn et al. (2016):

data("df_pupil_complete")
df_pupil_complete
# A tibble: 2,228 × 4
    subj trial  load p_size
   <int> <int> <int>  <dbl>
 1   701     1     2  1021.
 2   701     2     1   951.
 3   701     3     5  1064.
 4   701     4     4   913.
 5   701     5     0   603.
 6   701     6     3   826.
 7   701     7     0   464.
 8   701     8     4   758.
 9   701     9     2   733.
10   701    10     3   591.
# ℹ 2,218 more rows

You should be able to now fit a “maximal” model (correlated varying intercept and slopes for subjects) assuming a normal likelihood. Base your priors in the priors discussed in section 4.1.

Maximal model
  • first centre the predictor
df_pupil_complete <- df_pupil_complete %>%
  mutate(c_load = load-mean(load))
  • then run the model
fit_pupil <- brm(p_size ~ 1 + c_load + 
                   (1 +c_load | subj), 
                 data = df_pupil_complete,
                 family = gaussian(),
                 prior = c(
                   prior(normal(1000, 500), class = Intercept),
                   prior(normal(0, 1000), class = sigma),
                   prior(normal(0, 100), class = b, coef = c_load),
                   prior(normal(0, 1000), class = sd),
                   prior(lkj(2), class = cor)
                 ) )
Warning: There were 6 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
Warning: Examine the pairs() plot to diagnose sampling problems
  1. Examine the effect of load on pupil size, and the average pupil size. What do you conclude?
My solution
fit_pupil
 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: p_size ~ 1 + c_load + (1 + c_load | subj) 
   Data: df_pupil_complete (Number of observations: 2228) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Group-Level Effects: 
~subj (Number of levels: 20) 
                      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)          3321.35    426.49  2553.97  4207.68 1.00      656
sd(c_load)               72.15     15.68    47.37   107.83 1.00     1158
cor(Intercept,c_load)     0.31      0.24    -0.21     0.71 1.00     1200
                      Tail_ESS
sd(Intercept)              906
sd(c_load)                1994
cor(Intercept,c_load)     2058

Population-Level Effects: 
          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept  2456.93    482.09  1465.40  3359.21 1.01      659     1122
c_load       38.60     25.65   -14.75    86.89 1.00      986     1511

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma   505.07      7.65   490.20   520.69 1.00     5140     2878

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
  • 95% CrI crosses 0: quite a bit of uncertainty

  • alternatively, to only print c_load ::: {.cell hash=‘all_chapters_cache/html/unnamed-chunk-20_85ed5e35eaddef5e1c8bb9ddfd85823f’}

posterior_summary(fit_pupil, variable = "b_c_load")
         Estimate Est.Error      Q2.5    Q97.5
b_c_load 38.59823  25.65159 -14.75341 86.89461
  • but the intercept is quite large: ::: {.cell hash=‘all_chapters_cache/html/unnamed-chunk-21_f84b942cfe9bf2d9cec815bfcf462a99’}
posterior_summary(fit_pupil, variable = "b_Intercept")
            Estimate Est.Error     Q2.5    Q97.5
b_Intercept 2456.935  482.0879 1465.397 3359.207

:::

  • even though we assumed it wouldn’t be
brms::prior_summary(fit_pupil)
                prior     class      coef group resp dpar nlpar lb ub
               (flat)         b                                      
       normal(0, 100)         b    c_load                            
    normal(1000, 500) Intercept                                      
 lkj_corr_cholesky(2)         L                                      
 lkj_corr_cholesky(2)         L            subj                      
      normal(0, 1000)        sd                                  0   
      normal(0, 1000)        sd            subj                  0   
      normal(0, 1000)        sd    c_load  subj                  0   
      normal(0, 1000)        sd Intercept  subj                  0   
      normal(0, 1000)     sigma                                  0   
       source
      default
         user
         user
         user
 (vectorized)
         user
 (vectorized)
 (vectorized)
 (vectorized)
         user
  • overly informative prior for the intercept? :::
  1. Do a sensitivity analysis for the prior on the intercept (\(\alpha\)). What is the estimate of the effect (\(\beta\)) under different priors?
My solution
  • try a wider prior for \(\alpha\); I’m choosing 3000 and 1000ms

\[ \alpha \sim Normal(3000,1000) \]

fit_pupil_2 <- brm(p_size ~ 1 + c_load + 
                   (1 + c_load | subj), 
                 data = df_pupil_complete,
                 family = gaussian(),
                 prior = c(
                   prior(normal(3000, 1000), class = Intercept),
                   prior(normal(0, 1000), class = sigma),
                   prior(normal(0, 100), class = b, coef = c_load),
                   prior(normal(0, 1000), class = sd),
                   prior(lkj(2), class = cor)
                 ) )
posterior_summary(fit_pupil_2, variable = "b_c_load")
         Estimate Est.Error     Q2.5    Q97.5
b_c_load 55.65513  17.61118 19.96622 88.59905
  • the effect of c_load now ha more certainty
posterior_summary(fit_pupil, variable = "b_Intercept")
            Estimate Est.Error     Q2.5    Q97.5
b_Intercept 2456.935  482.0879 1465.397 3359.207
  1. Is the effect of load consistent across subjects? Investigate this visually.
My solution
  • check out overall effect; there are 3 peaks
pp_check(fit_pupil_2, ndraws = 50, type = "dens_overlay")

  • now by participants
ppc_dens_overlay_grouped(df_pupil_complete$p_size,
  yrep =
    posterior_predict(fit_pupil_2,
      ndraws = 100
    ),
  group = df_pupil_complete$subj
) +
  xlab("Signal in the N400 spatiotemporal window")

  • and by sd
pp_check(fit_pupil_2,
  type = "stat_grouped",
  ndraws = 1000,
  group = "subj",
  stat = "sd"
)

  • and also:
## For the hierarchical model is more complicated, # because we want the effect (beta) + adjustment: # we extract the overall group level effect:
beta <- c(as_draws_df(fit_pupil_2)$b_c_load)
# We extract the individual adjustments
ind_effects_v <- paste0("r_subj[", unique(df_pupil_complete$subj), ",c_load]") 
adjustment <- as.matrix(as_draws_df(fit_pupil)[ind_effects_v])
# We get the by subject effects in a data frame where each adjustment # is in each column.
by_subj_effect <- as_tibble(beta + adjustment)
# We summarize them by getting a table with the mean and the
# quantiles for each column and then binding them.
par_h <- lapply(by_subj_effect, function(x) {
  tibble(
    Estimate = mean(x),
    Q2.5 = quantile(x, .025),
    Q97.5 = quantile(x, .975)
  )
}) %>%
  bind_rows() %>%
  # We add a column to identify that the model, # and one with the subject labels:
  mutate(subj = unique(df_pupil_complete$subj)) %>% arrange(Estimate) %>%
  mutate(subj = factor(subj, levels = subj))
ggplot(par_h,
       aes(
         ymin = Q2.5,
         ymax = Q97.5,
         x = subj,
         y = Estimate
       )) +
  geom_errorbar() +
  geom_point() +
  # We'll also add the mean and 95% CrI of the overall difference # to the plot:
  geom_hline(yintercept =
               posterior_summary(fit_pupil_2)["b_c_load", "Estimate"]) +
  geom_hline(
    yintercept =
      posterior_summary(fit_pupil_2)["b_c_load", "Q2.5"],
    linetype = "dotted",
    linewidth = .5
  ) + geom_hline(
    yintercept =
      posterior_summary(fit_pupil_2)["b_c_load", "Q97.5"],
    linetype = "dotted",
    linewidth = .5
  ) +
  coord_flip() +
  ylab("Change in pupil size") + 
  xlab("Participant ID") +
  theme_bw()

2.2 Exercise 5.2 Are subject relatives easier to process than object relatives (log-normal likelihood)?

We begin with a classic question from the psycholinguistics literature: Are subject relatives easier to process than object relatives? The data come from Experiment 1 in a paper by Grodner and Gibson (2005).

Scientific question: Is there a subject relative advantage in reading?

Grodner and Gibson (2005) investigate an old claim in psycholinguistics that object relative clause (ORC) sentences are more difficult to process than subject relative clause (SRC) sentences. One explanation for this predicted difference is that the distance between the relative clause verb (sent in the example below) and the head noun phrase of the relative clause (reporter in the example below) is longer in ORC vs. SRC. Examples are shown below. The relative clause is shown in square brackets.

(1a) The reporter [who the photographer sent to the editor] was hoping for a good story. (ORC)

(1b) The reporter [who sent the photographer to the editor] was hoping for a good story. (SRC)

The underlying explanation has to do with memory processes: Shorter linguistic dependencies are easier to process due to either reduced interference or decay, or both. For implemented computational models that spell this point out, see Lewis and Vasishth (2005) and Engelmann, Jäger, and Vasishth (2020).

In the Grodner and Gibson data, the dependent measure is reading time at the relative clause verb, (e.g., sent) of different sentences with either ORC or SRC. The dependent variable is in milliseconds and was measured in a self-paced reading task. Self-paced reading is a task where subjects read a sentence or a short text word-by-word or phrase-by-phrase, pressing a button to get each word or phrase displayed; the preceding word disappears every time the button is pressed. In 6.1, we provide a more detailed explanation of this experimental method.

For this experiment, we are expecting longer reading times at the relative clause verbs of ORC sentences in comparison to the relative clause verb of SRC sentences.

data("df_gg05_rc")
df_gg05_rc
# A tibble: 672 × 7
    subj  item condition    RT residRT qcorrect experiment
   <int> <int> <chr>     <int>   <dbl>    <int> <chr>     
 1     1     1 objgap      320  -21.4         0 tedrg3    
 2     1     2 subjgap     424   74.7         1 tedrg2    
 3     1     3 objgap      309  -40.3         0 tedrg3    
 4     1     4 subjgap     274  -91.2         1 tedrg2    
 5     1     5 objgap      333   -8.39        1 tedrg3    
 6     1     6 subjgap     266  -87.3         1 tedrg2    
 7     1     7 objgap      327  -42.2         1 tedrg3    
 8     1     8 subjgap     279  -90.2         1 tedrg2    
 9     1     9 objgap      342  -23.2         1 tedrg3    
10     1    10 subjgap     297  -52.3         0 tedrg2    
# ℹ 662 more rows

You should use a sum coding for the predictors. Here, object relative clauses (“objgaps”) are coded +1, subject relative clauses -1.

Set contrasts
df_gg05_rc$condition <- factor(df_gg05_rc$condition, levels = c("subjgap","objgap"))
class(df_gg05_rc$condition)
[1] "factor"
contrasts(df_gg05_rc$condition) <- c(-.5,+.5)
contrasts(df_gg05_rc$condition)
        [,1]
subjgap -0.5
objgap   0.5
  • or, as in the book:
df_gg05_rc <- df_gg05_rc %>%
  mutate(c_cond = if_else(condition == "objgap", .5, -.5))

You should be able to now fit a “maximal” model (correlated varying intercept and slopes for subjects and for items) assuming a log-normal likelihood.

Maximal model
fit_df_gg05_rc <-
  brm(
    RT ~ condition + (condition |
                     subj) + (condition | item),
    family = lognormal(),
    prior =
      c(
        prior(normal(6, 1.5), class = Intercept),
        prior(normal(0, .1), class = b),
        prior(normal(0, 1), class = sigma),
        prior(normal(0, 1), class = sd),
        prior(lkj(2), class = cor)
      ),
    data = df_gg05_rc
  )

fit_df_gg05_rc
 Family: lognormal 
  Links: mu = identity; sigma = identity 
Formula: RT ~ condition + (condition | subj) + (condition | item) 
   Data: df_gg05_rc (Number of observations: 672) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Group-Level Effects: 
~item (Number of levels: 16) 
                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)                 0.04      0.02     0.00     0.09 1.00     1184
sd(condition1)                0.09      0.05     0.01     0.19 1.00     1392
cor(Intercept,condition1)     0.29      0.40    -0.58     0.89 1.00     2042
                          Tail_ESS
sd(Intercept)                 1419
sd(condition1)                1654
cor(Intercept,condition1)     2504

~subj (Number of levels: 42) 
                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)                 0.32      0.04     0.26     0.41 1.00      795
sd(condition1)                0.23      0.04     0.15     0.32 1.00     1916
cor(Intercept,condition1)     0.50      0.16     0.16     0.79 1.00     2003
                          Tail_ESS
sd(Intercept)                 1361
sd(condition1)                2812
cor(Intercept,condition1)     2517

Population-Level Effects: 
           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept      5.87      0.05     5.77     5.98 1.00      584     1182
condition1     0.10      0.05     0.00     0.18 1.00     1976     2487

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.36      0.01     0.34     0.38 1.00     3617     2564

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
  1. Examine the effect of relative clause attachment site (the predictor c_cond) on reading times RT (\(\beta\)).
My solution
  • the effect of RC attachment on RT on the log scale is
posterior_summary(fit_df_gg05_rc, variable = "b_condition1")
               Estimate  Est.Error        Q2.5     Q97.5
b_condition1 0.09707934 0.04563313 0.003938304 0.1834777
  1. Estimate the median difference between relative clause attachment sites in milliseconds, and report the mean and 95% CI.
My solution
alpha <- as_draws_df(fit_df_gg05_rc)$b_Intercept
beta <- as_draws_df(fit_df_gg05_rc)$b_condition1
# Difference between object RC coded as .5 and subject RC coded as .5 
effect <- exp(alpha + beta * .5) - exp(alpha + beta * -.5)
c(mean = mean(effect), quantile(effect, c(.025,.975)))
     mean      2.5%     97.5% 
34.820055  1.508776 67.937606 
  1. Do a sensitivity analysis. What is the estimate of the effect (\(\beta\)) under different priors? What is the difference in milliseconds between conditions under different priors?
My solution
  • first let’s check out the fit of the model
pp_check(fit_df_gg05_rc, ndraws = 50, type = "dens_overlay")

  • now by participants
ppc_dens_overlay_grouped(df_gg05_rc$RT,
  yrep =
    posterior_predict(fit_df_gg05_rc,
      ndraws = 100
    ),
  group = df_gg05_rc$subj
) +
  xlab("Signal in the N400 spatiotemporal window")

  • and by sd
pp_check(fit_df_gg05_rc,
  type = "stat_grouped",
  ndraws = 1000,
  group = "subj",
  stat = "sd"
)

posterior_summary(fit_df_gg05_rc, variable = "b_condition1")
               Estimate  Est.Error        Q2.5     Q97.5
b_condition1 0.09707934 0.04563313 0.003938304 0.1834777
posterior_summary(fit_df_gg05_rc, variable = "b_Intercept")
            Estimate Est.Error     Q2.5    Q97.5
b_Intercept 5.872797 0.0524547 5.767769 5.979495
  • our posteriors are pretty close to our priors
brms::prior_summary(fit_df_gg05_rc)
                prior     class       coef group resp dpar nlpar lb ub
       normal(0, 0.1)         b                                       
       normal(0, 0.1)         b condition1                            
       normal(6, 1.5) Intercept                                       
 lkj_corr_cholesky(2)         L                                       
 lkj_corr_cholesky(2)         L             item                      
 lkj_corr_cholesky(2)         L             subj                      
         normal(0, 1)        sd                                   0   
         normal(0, 1)        sd             item                  0   
         normal(0, 1)        sd condition1  item                  0   
         normal(0, 1)        sd  Intercept  item                  0   
         normal(0, 1)        sd             subj                  0   
         normal(0, 1)        sd condition1  subj                  0   
         normal(0, 1)        sd  Intercept  subj                  0   
         normal(0, 1)     sigma                                   0   
       source
         user
 (vectorized)
         user
         user
 (vectorized)
 (vectorized)
         user
 (vectorized)
 (vectorized)
 (vectorized)
 (vectorized)
 (vectorized)
 (vectorized)
         user
  • let’s see if this is due to the influence of our priors. Let’s start with narrower prior

\[ \beta \sim Normal(0,.01) \]

fit_df_gg05_rc_tight <-
  brm(
    RT ~ condition + (condition |
                     subj) + (condition | item),
    family = lognormal(),
    prior =
      c(
        prior(normal(6, 1.5), class = Intercept),
        prior(normal(0, .01), class = b),
        prior(normal(0, 1), class = sigma),
        prior(normal(0, 1), class = sd),
        prior(lkj(2), class = cor)
      ),
    data = df_gg05_rc
  )

fit_df_gg05_rc_tight
 Family: lognormal 
  Links: mu = identity; sigma = identity 
Formula: RT ~ condition + (condition | subj) + (condition | item) 
   Data: df_gg05_rc (Number of observations: 672) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Group-Level Effects: 
~item (Number of levels: 16) 
                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)                 0.04      0.02     0.00     0.09 1.00     1422
sd(condition1)                0.11      0.05     0.01     0.22 1.00     1005
cor(Intercept,condition1)     0.29      0.40    -0.59     0.89 1.00     1643
                          Tail_ESS
sd(Intercept)                 1414
sd(condition1)                1088
cor(Intercept,condition1)     2629

~subj (Number of levels: 42) 
                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)                 0.33      0.04     0.26     0.42 1.01      723
sd(condition1)                0.24      0.04     0.16     0.34 1.00     1568
cor(Intercept,condition1)     0.51      0.17     0.14     0.79 1.00     1549
                          Tail_ESS
sd(Intercept)                 1356
sd(condition1)                2299
cor(Intercept,condition1)     2410

Population-Level Effects: 
           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept      5.84      0.05     5.74     5.94 1.01      560      827
condition1     0.00      0.01    -0.02     0.02 1.00     5157     3093

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.36      0.01     0.34     0.39 1.00     3152     2867

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
posterior_summary(fit_df_gg05_rc_tight, variable = "b_condition1")
                Estimate  Est.Error        Q2.5    Q97.5
b_condition1 0.004108028 0.01000705 -0.01560103 0.023613
  • and now a wider prior

\[ \beta \sim Normal(0,1) \]

fit_df_gg05_rc_wide <-
  brm(
    RT ~ condition + (condition |
                     subj) + (condition | item),
    family = lognormal(),
    prior =
      c(
        prior(normal(6, 1.5), class = Intercept),
        prior(normal(0, 1), class = b),
        prior(normal(0, 1), class = sigma),
        prior(normal(0, 1), class = sd),
        prior(lkj(2), class = cor)
      ),
    data = df_gg05_rc
  )

fit_df_gg05_rc_wide
 Family: lognormal 
  Links: mu = identity; sigma = identity 
Formula: RT ~ condition + (condition | subj) + (condition | item) 
   Data: df_gg05_rc (Number of observations: 672) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Group-Level Effects: 
~item (Number of levels: 16) 
                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)                 0.04      0.02     0.00     0.09 1.00     1280
sd(condition1)                0.09      0.05     0.01     0.19 1.01     1083
cor(Intercept,condition1)     0.29      0.41    -0.62     0.90 1.00     1852
                          Tail_ESS
sd(Intercept)                 1480
sd(condition1)                 861
cor(Intercept,condition1)     2105

~subj (Number of levels: 42) 
                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)                 0.33      0.04     0.25     0.42 1.00     1060
sd(condition1)                0.23      0.04     0.15     0.32 1.00     2041
cor(Intercept,condition1)     0.50      0.17     0.15     0.79 1.01     2335
                          Tail_ESS
sd(Intercept)                 1636
sd(condition1)                2722
cor(Intercept,condition1)     2704

Population-Level Effects: 
           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept      5.88      0.05     5.78     5.99 1.00      593     1241
condition1     0.12      0.05     0.02     0.22 1.00     2099     3144

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.36      0.01     0.34     0.38 1.00     3745     2791

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
posterior_summary(fit_df_gg05_rc_wide, variable = "b_condition1")
              Estimate  Est.Error       Q2.5     Q97.5
b_condition1 0.1221044 0.05088979 0.02370349 0.2192081
prior <- data.frame(posterior_summary(fit_df_gg05_rc, variable = "b_condition1")) %>%
  mutate("Prior for $\\beta$" = "Normal(0,.1)")
tight <- data.frame(posterior_summary(fit_df_gg05_rc_tight, variable = "b_condition1")) %>%
  mutate("Prior for $\\beta$" = "Normal(0,.01)")
wide <- data.frame(posterior_summary(fit_df_gg05_rc_wide, variable = "b_condition1")) %>%
  mutate("Prior for $\\beta$" = "Normal(0,1)")

sens_priors <- rbind(prior,tight,wide)

brms::prior_summary(fit_df_gg05_rc_wide)
                prior     class       coef group resp dpar nlpar lb ub
         normal(0, 1)         b                                       
         normal(0, 1)         b condition1                            
       normal(6, 1.5) Intercept                                       
 lkj_corr_cholesky(2)         L                                       
 lkj_corr_cholesky(2)         L             item                      
 lkj_corr_cholesky(2)         L             subj                      
         normal(0, 1)        sd                                   0   
         normal(0, 1)        sd             item                  0   
         normal(0, 1)        sd condition1  item                  0   
         normal(0, 1)        sd  Intercept  item                  0   
         normal(0, 1)        sd             subj                  0   
         normal(0, 1)        sd condition1  subj                  0   
         normal(0, 1)        sd  Intercept  subj                  0   
         normal(0, 1)     sigma                                   0   
       source
         user
 (vectorized)
         user
         user
 (vectorized)
 (vectorized)
         user
 (vectorized)
 (vectorized)
 (vectorized)
 (vectorized)
 (vectorized)
 (vectorized)
         user
sens_priors %>% 
  kbl() %>%
  kable_styling()
Estimate Est.Error Q2.5 Q97.5 Prior for $\beta$
b_condition1 0.0970793 0.0456331 0.0039383 0.1834777 Normal(0,.1)
b_condition11 0.0041080 0.0100070 -0.0156010 0.0236130 Normal(0,.01)
b_condition12 0.1221044 0.0508898 0.0237035 0.2192081 Normal(0,1)
  • and now in milliseconds
alpha1 <- as_draws_df(fit_df_gg05_rc)$b_Intercept 
beta1 <- as_draws_df(fit_df_gg05_rc)$b_condition1
diff1 <- exp(alpha1 + beta1/2) - exp(alpha1 - beta1/2)
prior_ms <- data.frame(mean = mean(diff1), quantile(diff1, .025), quantile(diff1, .975), row.names = NULL) %>%
  rename("Estimate (ms)" = "mean",
         "2.5%" = "quantile.diff1..0.025.",
         "97.5%" = "quantile.diff1..0.975.")%>%
  mutate("Prior for $\\beta$" = "Normal(0,.1)")

alpha1 <- as_draws_df(fit_df_gg05_rc_tight)$b_Intercept 
beta1 <- as_draws_df(fit_df_gg05_rc_tight)$b_condition1
diff1 <- exp(alpha1 + beta1/2) - exp(alpha1 - beta1/2)
tight_ms <- data.frame(mean = mean(diff1), quantile(diff1, .025), quantile(diff1, .975), row.names = NULL) %>%
  rename("Estimate (ms)" = "mean",
         "2.5%" = "quantile.diff1..0.025.",
         "97.5%" = "quantile.diff1..0.975.")%>%
  mutate("Prior for $\\beta$" = "Normal(0,.01)")

alpha1 <- as_draws_df(fit_df_gg05_rc_wide)$b_Intercept 
beta1 <- as_draws_df(fit_df_gg05_rc_wide)$b_condition1
diff1 <- exp(alpha1 + beta1/2) - exp(alpha1 - beta1/2)
wide_ms <- data.frame(mean = mean(diff1), quantile(diff1, .025), quantile(diff1, .975), row.names = NULL) %>%
  rename("Estimate (ms)" = "mean",
         "2.5%" = "quantile.diff1..0.025.",
         "97.5%" = "quantile.diff1..0.975.") %>%
  mutate("Prior for $\\beta$" = "Normal(0,1)")
  

priors_ms <- rbind(prior_ms,tight_ms,wide_ms) %>%
  mutate(effect = "b_condition1") %>%
  relocate(effect, .before="Estimate (ms)")

priors_ms %>% 
  kbl() %>%
  kable_styling()
effect Estimate (ms) 2.5% 97.5% Prior for $\beta$
b_condition1 34.820055 1.508776 67.937606 Normal(0,.1)
b_condition1 1.428914 -5.311052 8.195846 Normal(0,.01)
b_condition1 44.200417 8.198927 81.433255 Normal(0,1)
  • we see lots of variation in estimates as a function of our priors
    • with the tight priors, there is a lot more uncertainty
    • with the regularised and wider priors the effects are a bit more similar
# these are all the intercept, i'm interested in the slope though :/
ggpubr::ggarrange(
  pp_check(fit_df_gg05_rc_wide, type = "stat") + theme_bw() + ggtitle("Wide priors N(0,1)"),
  pp_check(fit_df_gg05_rc_tight, type = "stat") + theme_bw() + ggtitle("Tight priors N(0,.01)"),
  pp_check(fit_df_gg05_rc, type = "stat") + theme_bw() + ggtitle("Original priors N(0,.1)"),
nrow = 3
)

2.3 Exercise 5.3 Relative clause processing in Mandarin Chinese

Load the following two data sets:

data("df_gibsonwu")
data("df_gibsonwu2")

The data are taken from two experiments that investigate (inter alia) the effect of relative clause type on reading time in Chinese. The data are from Gibson and Wu (2013) and Vasishth et al. (2013) respectively. The second data set is a direct replication attempt of the Gibson and Wu (2013) experiment.

Chinese relative clauses are interesting theoretically because they are prenominal: the relative clause appears before the head noun. For example, the English relative clauses shown above would appear in the following order in Mandarin. The square brackets mark the relative clause, and REL refers to the Chinese equivalent of the English relative pronoun who.

(2a) [The photographer sent to the editor] REL the reporter was hoping for a good story. (ORC)

(2b) [sent the photographer to the editor] REL the reporter who was hoping for a good story. (SRC)

As discussed in Gibson and Wu (2013), the consequence of Chinese relative clauses being prenominal is that the distance between the verb in relative clause and the head noun is larger in subject relatives than object relatives. Hsiao and Gibson (2003) were the first to suggest that the larger distance in subject relatives leads to longer reading time at the head noun. Under this view, the prediction is that subject relatives are harder to process than object relatives. If this is true, this is interesting and surprising because in most other languages that have been studied, subject relatives are easier to process than object relatives; so Chinese will be a very unusual exception cross-linguistically.

The data provided are for the critical region (the head noun; here, reporter). The experiment method is self-paced reading, so we have reading times in milliseconds. The second data set is a direct replication attempt of the first data set, which is from Gibson and Wu (2013).

The research hypothesis is whether the difference in reading times between object and subject relative clauses is negative. For the first data set (df_gibsonwu), investigate this question by fitting two “maximal” hierarchical models (correlated varying intercept and slopes for subjects and items). The dependent variable in both models is the raw reading time in milliseconds. The first model should use the normal likelihood in the model; the second model should use the log-normal likelihood. In both models, use \(\pm 0.5\) sum coding to model the effect of relative clause type. You will need to decide on appropriate priors for the various parameters.

Contrasts
  • obj-ext = -0.5, subj-ext = 0.5
head(df_gibsonwu)
    subj item     type   rt
94     1   13  obj-ext 1561
221    1    6 subj-ext  959
341    1    5  obj-ext  582
461    1    9  obj-ext  294
621    1   14 subj-ext  438
753    1    4 subj-ext  286
df_gibsonwu$type <- factor(df_gibsonwu$type, levels = c("obj-ext","subj-ext"))

contrasts(df_gibsonwu$type) <- c(0.5,-0.5)
contrasts(df_gibsonwu$type)
         [,1]
obj-ext   0.5
subj-ext -0.5
Normal likelihood
  • first set priors
priors_gw_norm <- c(
  set_prior("normal(500, 150)", class = "Intercept"),
  set_prior("normal(0,500)", class = "b", coef = "type1"),
  set_prior("normal(0,500)", class = "sd"),
  set_prior("normal(0,1000)",class = "sigma"),
  set_prior("lkj(2)", class = "cor")
)
  • fit model
fit_gw_norm <- brm(rt ~ 1 + type + 
                   (1 + type | subj) +
                   (1 + type | item), 
                 data = df_gibsonwu,
                 family = gaussian(),
                 prior = priors_gw_norm)
Log-normal likelihood
  • set priors
priors_gw_log <- c(
  set_prior("normal(6, 1.5)", class = "Intercept"),
  set_prior("normal(0,1)", class = "b", coef = "type1"),
  set_prior("normal(0,1)",class = "sd"),
  set_prior("normal(0,1)",class = "sigma"),
  set_prior("lkj(2)", class = "cor")
)
  • fit model
fit_gw_log <- brm(rt ~ 1 + type + 
                   (1 + type | subj) +
                   (1 + type | item), 
                 data = df_gibsonwu,
                 family = lognormal(),
                 prior = priors_gw_log)
  1. Plot the posterior predictive distributions from the two models. What is the difference in the posterior predictive distributions of the two models; and why is there a difference?
Posterior predictive distributions
ggpubr::ggarrange(
  pp_check(fit_gw_norm, ndraws = 1000) +
    ggtitle("Normal distr") +
    theme(legend.position = "none"),
  pp_check(fit_gw_log, ndraws = 1000) + 
    ggtitle("Log-normal distr") +
    theme(legend.position = "none"),
  cowplot::get_legend(pp_check(fit_gw_log) +
                        theme(legend.position = "bottom")),
  ncol = 1,
  heights = c(.45,.45,.1)
)

  • quite a mismatch in normal likelihood model, e.g., negative values are generated
  • log-normal better fit
  1. Examine the posterior distributions of the effect estimates (in milliseconds) in the two models. Why are these different?
Posterior distribution
# Normal distr
plot(fit_gw_norm)

# Log distr
plot(fit_gw_log)

Effect estimates
  • look at effect estimates
# normal distr
gw_intercept_norm <- as_draws_df(fit_gw_norm)$b_Intercept
gw_slope_norm <- as_draws_df(fit_gw_norm)$b_type1

gw_RT_diff_norm <- gw_slope_norm
round(quantile(gw_RT_diff_norm,prob=c(0.025,0.975)),2)
   2.5%   97.5% 
-247.39   23.56 
# log distr
gw_intercept_log <- as_draws_df(fit_gw_log)$b_Intercept
gw_slope_log <- as_draws_df(fit_gw_log)$b_type1

gw_RT_diff_log <- exp(gw_intercept_log + gw_slope_log/2) -
exp(gw_intercept_log - gw_slope_log/2)
quantile(gw_RT_diff_log,prob=c(0.025,0.975))
     2.5%     97.5% 
-78.05620  16.21854 
boxplot(rt~type,df_gibsonwu)

  • 95% credible interval for estimates include 0 for both norm and log-norm distributions
  • log-norm model CrI is tighter, and includes smaller effect size (but includes 0)
  1. Given the posterior predictive distributions you plotted above, why is the log-normal likelihood model better for carrying out inference and hypothesis testing?
Tip
# look at range per type
df_gibsonwu %>%
  group_by(type) %>%
  summarise(min(rt), max(rt))
# A tibble: 2 × 3
  type     `min(rt)` `max(rt)`
  <fct>        <int>     <int>
1 obj-ext        172      2308
2 subj-ext       189      6217
library(ggrain) 
df_gibsonwu %>%
  # filter(session!="bi") %>%
  # mutate(px_list = paste0(participant,list)) %>%
  ggplot(data = ., 
         aes(x = type, y = rt, 
             fill = type, color = type, shape = type)) +
  labs(title = "Distribution of observations") +
  geom_rain(alpha = .5, rain.side = 'f1x1',
            violin.args = list(color = NA, alpha = .5)) +
  theme_bw() +
  scale_fill_manual(values=c("dodgerblue", "darkorange")) +
  scale_color_manual(values=c("dodgerblue", "darkorange")) 

My answer

  • subj-ext had extreme raw values; this would’ve biased the model with a normal distribution

Next, work out a normal approximation of the log-normal model’s posterior distribution for the relative clause effect that you obtained from the above data analysis. Then use that normal approximation as an informative prior for the slope parameter when fitting a hierarchical model to the second data set. This is an example of incrementally building up knowledge by successively using a previous study’s posterior as a prior for the next study; this is essentially equivalent to pooling both data sets (check that pooling the data and using a Normal(0,1) prior for the effect of interest, with a log-normal likelihood, gives you approximately the same posterior as the informative-prior model fit above).

Dataset 1 log estimates
  • first check 2nd dataset
head(df_gibsonwu2)
   subj item condition pos   rt    region
9   1m1   15   obj-ext   8  832 head noun
20  1m1    8  subj-ext   8 2131 head noun
33  1m1   11   obj-ext   8  553 head noun
46  1m1   10  subj-ext   8 1091 head noun
62  1m1   16  subj-ext   8  598 head noun
75  1m1   14  subj-ext   8  645 head noun
df_gibsonwu2 <- df_gibsonwu2 %>%
  rename("type" = condition)
df_gibsonwu2$type <- factor(df_gibsonwu2$type, levels = c("obj-ext","subj-ext"))

contrasts(df_gibsonwu2$type) <- c(0.5,-0.5)
contrasts(df_gibsonwu2$type)
         [,1]
obj-ext   0.5
subj-ext -0.5
  • remind ourselves of the estimates from the first dataset
summary(fit_gw_log)
 Family: lognormal 
  Links: mu = identity; sigma = identity 
Formula: rt ~ 1 + type + (1 + type | subj) + (1 + type | item) 
   Data: df_gibsonwu (Number of observations: 547) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Group-Level Effects: 
~item (Number of levels: 15) 
                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)            0.20      0.05     0.13     0.33 1.00     1183
sd(type1)                0.07      0.05     0.00     0.20 1.00     1569
cor(Intercept,type1)    -0.00      0.42    -0.77     0.78 1.00     3868
                     Tail_ESS
sd(Intercept)            2054
sd(type1)                1816
cor(Intercept,type1)     2774

~subj (Number of levels: 37) 
                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)            0.25      0.04     0.18     0.34 1.00     1247
sd(type1)                0.14      0.07     0.02     0.28 1.00     1142
cor(Intercept,type1)    -0.46      0.30    -0.90     0.23 1.00     2456
                     Tail_ESS
sd(Intercept)            2073
sd(type1)                1084
cor(Intercept,type1)     2525

Population-Level Effects: 
          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept     6.06      0.07     5.92     6.20 1.00      982     1652
type1        -0.07      0.05    -0.18     0.04 1.00     2852     2661

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.51      0.02     0.48     0.55 1.00     3433     3015

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
  • for replication study, use:
    • type: \(LogNormal(-0.07, 0.07)\) (type1 Estimate and Est. Error?)
    • Intercept: \(LogNormal(6, 0.06)\)

Dataset 2 model

priors_gw2_log <- c(
  set_prior("normal(6, 0.07)", class = "Intercept"),
  set_prior("normal(-0.7,0.06)", class = "b", coef = "type1"),
  set_prior("normal(0,1)",class = "sd"),
  set_prior("normal(0,1)",class = "sigma"),
  set_prior("lkj(2)", class = "cor")
)
  • fit model; convergence warning (ESS too low)
fit_gw2_log <- brm(rt ~ 1 + type + 
                   (1 + type | subj) +
                   (1 + type | item), 
                 data = df_gibsonwu2,
                 family = lognormal(),
                 prior = priors_gw2_log)
summary(fit_gw2_log)
 Family: lognormal 
  Links: mu = identity; sigma = identity 
Formula: rt ~ 1 + type + (1 + type | subj) + (1 + type | item) 
   Data: df_gibsonwu2 (Number of observations: 595) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Group-Level Effects: 
~item (Number of levels: 15) 
                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)            0.17      0.04     0.10     0.27 1.00     1247
sd(type1)                0.61      0.15     0.37     0.95 1.00     1252
cor(Intercept,type1)    -0.28      0.32    -0.79     0.40 1.00      524
                     Tail_ESS
sd(Intercept)            2247
sd(type1)                1604
cor(Intercept,type1)     1206

~subj (Number of levels: 40) 
                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)            0.24      0.04     0.18     0.32 1.00     1221
sd(type1)                0.11      0.07     0.01     0.24 1.00      931
cor(Intercept,type1)     0.03      0.34    -0.65     0.72 1.00     3762
                     Tail_ESS
sd(Intercept)            2346
sd(type1)                1521
cor(Intercept,type1)     2462

Population-Level Effects: 
          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept     6.02      0.05     5.92     6.13 1.00      963     1489
type1        -0.62      0.06    -0.74    -0.49 1.00     3196     2232

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.43      0.01     0.40     0.46 1.00     3846     2923

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_gw2_log)

Estimates
# log distr
gw2_intercept_log <- as_draws_df(fit_gw2_log)$b_Intercept
gw2_slope_log <- as_draws_df(fit_gw2_log)$b_type1

gw2_RT_diff_log <- exp(gw2_intercept_log + gw2_slope_log/2) - exp(gw2_intercept_log - gw2_slope_log/2)
quantile(gw2_RT_diff_log,prob=c(0.025,0.975))
     2.5%     97.5% 
-320.3854 -198.1758 
Datasets 1 and 2
# make sure 2 datasets have same column names/order
names(df_gibsonwu); names(df_gibsonwu2)
[1] "subj" "item" "type" "rt"  
[1] "subj"   "item"   "type"   "pos"    "rt"     "region"
df_gibsonwu2 <- df_gibsonwu2 %>%
  select(subj,item,type,rt) %>%
  mutate(subj = paste0(subj,"_gw2"),
         item = paste0(item,"_gw2"))

df_gibsonwu <- df_gibsonwu %>%
  mutate(subj = paste0(subj,"_gw1"),
         item = paste0(item,"_gw1"))

names(df_gibsonwu) == names(df_gibsonwu2)
[1] TRUE TRUE TRUE TRUE
# combine
df_gw12 <- rbind(df_gibsonwu,df_gibsonwu2)
Informative model
# contrasts
head(df_gw12)
     subj   item     type   rt
94  1_gw1 13_gw1  obj-ext 1561
221 1_gw1  6_gw1 subj-ext  959
341 1_gw1  5_gw1  obj-ext  582
461 1_gw1  9_gw1  obj-ext  294
621 1_gw1 14_gw1 subj-ext  438
753 1_gw1  4_gw1 subj-ext  286
df_gw12$type <- factor(df_gw12$type, levels = c("obj-ext","subj-ext"))

contrasts(df_gw12$type) <- c(0.5,-0.5)
contrasts(df_gw12$type)
         [,1]
obj-ext   0.5
subj-ext -0.5
  • same priors as based on previous models
priors_gw12_log <- c(
  set_prior("normal(6, 0.07)", class = "Intercept"),
  set_prior("normal(-0.7,0.06)", class = "b", coef = "type1"),
  set_prior("normal(0,1)",class = "sd"),
  set_prior("normal(0,1)",class = "sigma"),
  set_prior("lkj(2)", class = "cor")
)
fit_gw12_log <- brm(rt ~ 1 + type + 
                   (1 + type | subj) +
                   (1 + type | item), 
                 data = df_gw12,
                 family = lognormal(),
                 prior = priors_gw12_log)
summary(fit_gw12_log)
 Family: lognormal 
  Links: mu = identity; sigma = identity 
Formula: rt ~ 1 + type + (1 + type | subj) + (1 + type | item) 
   Data: df_gw12 (Number of observations: 1142) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Group-Level Effects: 
~item (Number of levels: 30) 
                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)            0.18      0.03     0.13     0.24 1.00     1514
sd(type1)                0.44      0.10     0.25     0.66 1.01      751
cor(Intercept,type1)    -0.15      0.28    -0.66     0.39 1.01      434
                     Tail_ESS
sd(Intercept)            2524
sd(type1)                1537
cor(Intercept,type1)      922

~subj (Number of levels: 77) 
                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)            0.24      0.03     0.20     0.30 1.00     1524
sd(type1)                0.12      0.05     0.02     0.22 1.00      881
cor(Intercept,type1)    -0.34      0.27    -0.81     0.26 1.00     2971
                     Tail_ESS
sd(Intercept)            2060
sd(type1)                1251
cor(Intercept,type1)     2628

Population-Level Effects: 
          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept     6.05      0.05     5.95     6.14 1.01      771     1433
type1        -0.49      0.07    -0.63    -0.34 1.00     1267     2130

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.47      0.01     0.45     0.49 1.00     4625     2905

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_gw12_log)

Estimates
# log distr
gw12_intercept_log <- as_draws_df(fit_gw12_log)$b_Intercept
gw12_slope_log <- as_draws_df(fit_gw12_log)$b_type1

gw12_RT_diff_log <- exp(gw12_intercept_log + gw12_slope_log/2) - exp(gw12_intercept_log - gw12_slope_log/2)
quantile(gw12_RT_diff_log,prob=c(0.025,0.975))
     2.5%     97.5% 
-274.9347 -144.8008 
  • negative slope, CrI doesn’t include 0

Regularised prior?

priors_gw12_log_reg <- c(
  set_prior("normal(6, 1.5)", class = "Intercept"),
  set_prior("normal(0,1)", class = "b", coef = "type1"),
  set_prior("normal(0,1)",class = "sd"),
  set_prior("normal(0,1)",class = "sigma"),
  set_prior("lkj(2)", class = "cor")
)
fit_gw12_log_reg <- brm(rt ~ 1 + type + 
                   (1 + type | subj) +
                   (1 + type | item), 
                 data = df_gw12,
                 family = lognormal(),
                 prior = priors_gw12_log_reg)
summary(fit_gw12_log_reg)
 Family: lognormal 
  Links: mu = identity; sigma = identity 
Formula: rt ~ 1 + type + (1 + type | subj) + (1 + type | item) 
   Data: df_gw12 (Number of observations: 1142) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Group-Level Effects: 
~item (Number of levels: 30) 
                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)            0.17      0.03     0.13     0.24 1.00     1516
sd(type1)                0.11      0.05     0.01     0.20 1.00      972
cor(Intercept,type1)    -0.28      0.31    -0.80     0.37 1.00     3205
                     Tail_ESS
sd(Intercept)            2133
sd(type1)                1099
cor(Intercept,type1)     1848

~subj (Number of levels: 77) 
                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)            0.25      0.03     0.20     0.30 1.00     1415
sd(type1)                0.11      0.05     0.01     0.20 1.00     1150
cor(Intercept,type1)    -0.34      0.30    -0.85     0.33 1.00     3502
                     Tail_ESS
sd(Intercept)            2205
sd(type1)                1421
cor(Intercept,type1)     2562

Population-Level Effects: 
          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept     6.03      0.04     5.95     6.12 1.00     1690     2214
type1        -0.08      0.04    -0.15    -0.01 1.00     3981     3214

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.47      0.01     0.45     0.49 1.00     4353     2865

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
prior_summary(fit_gw12_log_reg)
                prior     class      coef group resp dpar nlpar lb ub
               (flat)         b                                      
          normal(0,1)         b     type1                            
       normal(6, 1.5) Intercept                                      
 lkj_corr_cholesky(2)         L                                      
 lkj_corr_cholesky(2)         L            item                      
 lkj_corr_cholesky(2)         L            subj                      
          normal(0,1)        sd                                  0   
          normal(0,1)        sd            item                  0   
          normal(0,1)        sd Intercept  item                  0   
          normal(0,1)        sd     type1  item                  0   
          normal(0,1)        sd            subj                  0   
          normal(0,1)        sd Intercept  subj                  0   
          normal(0,1)        sd     type1  subj                  0   
          normal(0,1)     sigma                                  0   
       source
      default
         user
         user
         user
 (vectorized)
 (vectorized)
         user
 (vectorized)
 (vectorized)
 (vectorized)
 (vectorized)
 (vectorized)
 (vectorized)
         user
plot(fit_gw12_log_reg)

Estimates
# log distr
gw12_intercept_log_reg <- as_draws_df(fit_gw12_log_reg)$b_Intercept
gw12_slope_log_reg <- as_draws_df(fit_gw12_log_reg)$b_type1

gw12_RT_diff_log_reg <- exp(gw12_intercept_log_reg + gw12_slope_log_reg/2) - 
  exp(gw12_intercept_log_reg - gw12_slope_log_reg/2)
quantile(gw12_RT_diff_log_reg,prob=c(0.025,0.975))
      2.5%      97.5% 
-66.127270  -3.048263 
  • now the 95% CrI does not include 0, but the range of values is much tighter and includes smaller values

2.4 Exercise 5.4 - Agreement attraction in comprehension

Load the following data:

data("df_dillonE1")
dillonE1 <- df_dillonE1
head(dillonE1)
        subj       item   rt int     expt
49 dillonE11 dillonE119 2918 low dillonE1
56 dillonE11 dillonE119 1338 low dillonE1
63 dillonE11 dillonE119  424 low dillonE1
70 dillonE11 dillonE119  186 low dillonE1
77 dillonE11 dillonE119  195 low dillonE1
84 dillonE11 dillonE119 1218 low dillonE1

The data are taken from an experiment that investigate (inter alia) the effect of number similarity between a noun and the auxiliary verb in sentences like the following. There are two levels to a factor called Int(erference): low and high.

(3a) low: The key to the cabinet are on the table (3b) high: The key to the cabinets are on the table

Here, in (3b), the auxiliary verb are is predicted to be read faster than in (3a), because the plural marking on the noun cabinets leads the reader to think that the sentence is grammatical. (Both sentences are ungrammatical.) This phenomenon, where the high condition is read faster than the low condition, is called agreement attraction.

The data provided are for the critical region (the auxiliary verb are). The experiment method is eye-tracking; we have total reading times in milliseconds.

The research question is whether the difference in reading times between high and low conditions is negative.

  • First, using a log-normal likelihood, fit a hierarchical model with correlated varying intercept and slopes for subjects and items. You will need to decide on the priors for the model.
  • By simply looking at the posterior distribution of the slope parameter, \(\beta\), what would you conclude about the theoretical claim relating to agreement attraction?

2.5 Exercise 5.5

2.6 Exercise 5.6

2.7 Exercise 5.7

2.8 Exercise 5.8

2.9 Quarto

Quarto enables you to weave together content and executable code into a finished presentation. To learn more about Quarto presentations see https://quarto.org/docs/presentations/.

2.10 Bullets

When you click the Render button a document will be generated that includes:

  • Content authored with markdown
  • Output from executable code

2.11 Code

When you click the Render button a presentation will be generated that includes both content and the output of embedded code. You can embed code like this:

1 + 1
[1] 2

3 Session Info

sessionInfo()
R version 4.2.3 (2023-03-15)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.2.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] ggrain_0.0.3         rstan_2.26.22        StanHeaders_2.26.22 
 [4] rootSolve_1.8.2.3    cmdstanr_0.5.3       pdftools_3.3.3      
 [7] cowplot_1.1.1        lme4_1.1-32          Matrix_1.5-3        
[10] gridExtra_2.3        kableExtra_1.3.4     papaja_0.1.1        
[13] tinylabels_0.2.3     bcogsci_0.0.0.9000   hypr_0.2.3          
[16] tictoc_1.1           bayesplot_1.10.0     brms_2.19.0         
[19] Rcpp_1.0.10          bridgesampling_1.1-2 loo_2.6.0           
[22] ggplot2_3.4.2        extraDistr_1.9.1     purrr_1.0.1         
[25] tidyr_1.3.0          dplyr_1.1.1          MASS_7.3-58.2       

loaded via a namespace (and not attached):
  [1] backports_1.4.1      systemfonts_1.0.4    plyr_1.8.8          
  [4] igraph_1.4.2         splines_4.2.3        crosstalk_1.2.0     
  [7] TH.data_1.1-1        rstantools_2.3.1     inline_0.3.19       
 [10] digest_0.6.31        htmltools_0.5.5      fansi_1.0.4         
 [13] magrittr_2.0.3       checkmate_2.1.0      RcppParallel_5.1.7  
 [16] matrixStats_0.63.0   xts_0.13.1           sandwich_3.0-2      
 [19] svglite_2.1.1        askpass_1.1          prettyunits_1.1.1   
 [22] colorspace_2.1-0     rvest_1.0.3          rbibutils_2.2.13    
 [25] xfun_0.38            callr_3.7.3          crayon_1.5.2        
 [28] jsonlite_1.8.4       survival_3.5-3       zoo_1.8-12          
 [31] glue_1.6.2           gtable_0.3.3         emmeans_1.8.5       
 [34] webshot_0.5.4        V8_4.3.0             distributional_0.3.2
 [37] car_3.1-2            pkgbuild_1.4.0       abind_1.4-5         
 [40] scales_1.2.1         mvtnorm_1.1-3        qpdf_1.3.2          
 [43] rstatix_0.7.2        miniUI_0.1.1.1       viridisLite_0.4.1   
 [46] xtable_1.8-4         stats4_4.2.3         DT_0.27             
 [49] datawizard_0.7.1     htmlwidgets_1.6.2    httr_1.4.5          
 [52] threejs_0.3.3        posterior_1.4.1      ellipsis_0.3.2      
 [55] pkgconfig_2.0.3      farver_2.1.1         utf8_1.2.3          
 [58] labeling_0.4.2       polynom_1.4-1        tidyselect_1.2.0    
 [61] rlang_1.1.0          reshape2_1.4.4       later_1.3.0         
 [64] effectsize_0.8.3     munsell_0.5.0        tools_4.2.3         
 [67] cli_3.6.1            generics_0.1.3       broom_1.0.4         
 [70] evaluate_0.20        stringr_1.5.0        fastmap_1.1.1       
 [73] yaml_2.3.7           ggpp_0.5.2           processx_3.8.0      
 [76] knitr_1.42           nlme_3.1-162         mime_0.12           
 [79] projpred_2.5.0       pracma_2.4.2         xml2_1.3.3          
 [82] compiler_4.2.3       shinythemes_1.2.0    rstudioapi_0.14     
 [85] curl_5.0.0           gamm4_0.2-6          ggsignif_0.6.4      
 [88] tibble_3.2.1         stringi_1.7.12       highr_0.10          
 [91] ps_1.7.4             parameters_0.20.3    Brobdingnag_1.2-9   
 [94] lattice_0.20-45      nloptr_2.0.3         markdown_1.6        
 [97] shinyjs_2.1.0        tensorA_0.36.2       vctrs_0.6.1         
[100] pillar_1.9.0         lifecycle_1.0.3      Rdpack_2.4          
[103] estimability_1.4.1   insight_0.19.1       httpuv_1.6.9        
[106] R6_2.5.1             promises_1.2.0.1     codetools_0.2-19    
[109] boot_1.3-28.1        colourpicker_1.2.0   gtools_3.9.4        
[112] withr_2.5.0          shinystan_2.6.0      multcomp_1.4-23     
[115] mgcv_1.8-42          bayestestR_0.13.1    parallel_4.2.3      
[118] gghalves_0.1.4       coda_0.19-4          minqa_1.2.5         
[121] rmarkdown_2.21       carData_3.0-5        ggpubr_0.6.0        
[124] shiny_1.7.4          base64enc_0.1-3      dygraphs_1.1.1.6